* generate new ballots counter
egen ballots_2021 = group(v05 v03 v07 v09 v08 v10a v10b v10c), missing

* prepare variables for collapsed dataset

* create variable to indicate 1 if Republican or Independent is running
gen repind_pres=1 if partycomb==2
replace repind_pres=0 if repind_pres==.

clonevar d_votes=v23
replace d_votes=0 if partycomb!=1

clonevar ri_votes=v23
replace ri_votes=0 if partycomb!=2

gen b_latwR=1 if basket_latw==1 & partycomb==2
replace b_latwR=0 if b_latwR==.
* new variable to show how many candidates are on each ballot once its collapsed, redo of cand per seat
gen candidate_dummy = 1

* variables to indicatae if winner was a woman once collapsed
gen woman_win=1 if basket_women==1 & v24==1
replace woman_win=0 if woman_win==.

gen man_win=1 if basket_men==1 & v24==1
replace man_win=0 if man_win==.

* variable to indicate if an incumbent won
gen inc_win=1 if v22==1 & v24==1
replace inc_win=0 if inc_win==.

* New variable to indicate what share of incumbents in that state in that year are X type of people
gen inc_poc=1 if basket_lat==1 & v22==1| basket_afam==1& v22==1 | basket_asam==1 & v22==1
replace inc_poc=0 if inc_poc==.

gen inc_blat=1 if basket_lat==1 & v22==1
replace inc_blat=0 if inc_blat==.

gen inc_blatw=1 if basket_latw==1 & v22==1
replace inc_blatw=0 if inc_blatw==.

gen inc_blatm=1 if basket_latm==1 & v22==1
replace inc_blatm=0 if inc_blatm==.

gen inc_bwomen=1 if basket_women==1 & v22==1
replace inc_bwomen=0 if inc_bwomen==.

gen inc_bwoc=1 if basket_latw==1 & v22==1 | basket_asamw==1 & v22==1 |basket_afamw==1 & v22==1
replace inc_bwoc=0 if inc_bwoc==.

gen inc_bmoc=1 if basket_latm==1 & v22==1 | basket_asamm==1 & v22==1 |basket_afamm==1 & v22==1
replace inc_bmoc=0 if inc_bmoc==.
* new variable to indicate state year
egen stateyear = group(v05 v03), missing
egen stateyearstring = concat (v02 v05)

* new variable to indicate whether a nonincumbent latina or latino is on the ballot
gen new_latw=1 if v22==0 & basket_latw==1
replace new_latw=0 if new_latw==.

gen new_latm=1 if v22==0 & basket_latm==1
replace new_latm=0 if new_latm==.



* Generate a dataset with the variables that we want to collapse into a ballot-level analysis.
* max(variables that either have a consistent numerical value across the ballot, or have dummy indicators)
* sum categorical to indicate how many latinos are on ballot, and new measure of dem partisan strength with demvotes and total votes
collapse (max) inc_win woman_win afampoppct asampoppct latpoppct whpoppct pocpoppct latcvappct whcvappct afamcvappct asamcvappct othcvappct  tlimit  legprof demvoteshareballot_2020 distrepubstr  openelect  multimem allincfmt educcolldeg unemppct v05 v03 v07 v09 repind_win b_afamwwinner b_afammwinner/*
*/ b_asamwwinner b_asammwinner b_latwwinner b_latmwinner b_whwwinner b_whmwinner repind_pres stateyear (sum)ballot_newlatwcount=new_latw ballot_newlatmcount=new_latm b_incwom=inc_bwomen b_incbwoc=inc_bwoc b_incbmoc=inc_bmoc b_inclatm=inc_blatm b_inclatw=inc_blatw b_incpoc=inc_poc  b_inclat=inc_blat v22 d_votes ri_votes v23 candperseat=candidate_dummy ballot_latwcount=basket_latw ballot_latmcount=basket_latm ballot_womencount=basket_women  b_latmD b_latwD b_latmR b_latwR /*
*/ basket_afamw basket_afamm/*
*/ , by(ballots)


* generate dummy indicators of ballot presence
gen ballot_latwpres = 1 if ballot_latwcount!=0
replace ballot_latwpres=0 if ballot_latwpres==.

gen ballot_latmpres = 1 if ballot_latmcount!=0
replace ballot_latmpres=0 if ballot_latmpres==.

gen ballot_latallpres = 1 if ballot_latwcount!=0 
replace ballot_latallpres=1 if ballot_latmcount!=0
replace ballot_latallpres=0 if ballot_latallpres==.

gen ballot_newlatwpres = 1 if ballot_newlatwcount!=0
replace ballot_newlatwpres=0 if ballot_newlatwpres==.


gen ballot_newlatmpres = 1 if ballot_newlatmcount!=0
replace ballot_newlatmpres=0 if ballot_newlatmpres==.


gen ballot_newlatwpresMMD = 1 if ballot_newlatwcount!=0 & pocpoppct>=.5
replace ballot_newlatwpresMMD=0 if ballot_newlatwpresMMD==.


gen ballot_newlatmpresMMD = 1 if ballot_newlatmcount!=0 & pocpoppct>=.5
replace ballot_newlatmpresMMD=0 if ballot_newlatmpresMMD==.


gen ballot_newlatwpresLAT = 1 if ballot_newlatwcount!=0 & latpoppct>=.5
replace ballot_newlatwpresLAT=0 if ballot_newlatwpresLAT==.

gen ballot_newlatmpresLAT = 1 if ballot_newlatmcount!=0 & latpoppct>=.5
replace ballot_newlatmpresLAT=0 if ballot_newlatmpresLAT==.

* generate variable to indicate how many incumbents from different groups are on the ballot at the same time
egen stateyr_incpoc = sum(b_incpoc), by (stateyear)

egen stateyr_inc = sum(v22), by(stateyear)


gen stateyear_incpocpct = (stateyr_incpoc/stateyr_inc)
replace stateyear_incpocpct=0 if stateyear_incpocpct==.

egen stateyr_incwoc = sum(b_incbwoc), by (stateyear)
gen stateyear_incwocpct = (stateyr_incwoc/stateyr_inc)
replace stateyear_incwocpct=0 if stateyear_incwocpct==.

egen stateyr_incmoc = sum(b_incbmoc), by (stateyear)
gen stateyear_incmocpct = (stateyr_incmoc/stateyr_inc)
replace stateyear_incmocpct=0 if stateyear_incmocpct==.


egen stateyr_incwom = sum(b_incwom), by (stateyear)
gen stateyear_incwompct = (stateyr_incwom/stateyr_inc)
replace stateyear_incwompct=0 if stateyear_incwompct==.

egen stateyr_inclat = sum(b_inclat), by (stateyear)
gen stateyear_inclatpct = (stateyr_inclat/stateyr_inc)
replace stateyear_inclatpct=0 if stateyear_inclatpct==.


egen stateyr_inclatw = sum(b_inclatw), by (stateyear)
gen stateyear_inclatwpct = (stateyr_inclatw/stateyr_inc)
replace stateyear_inclatwpct=0 if stateyear_inclatwpct==.

egen stateyr_inclatm = sum(b_inclatm), by (stateyear)
gen stateyear_inclatmpct = (stateyr_inclatm/stateyr_inc)
replace stateyear_inclatmpct=0 if stateyear_inclatmpct==.

